/*****************************************************************************
 * Copyright (C) Codehaus.org                                                *
 * ------------------------------------------------------------------------- *
 * Licensed under the Apache License, Version 2.0 (the "License");           *
 * you may not use this file except in compliance with the License.          *
 * You may obtain a copy of the License at                                   *
 *                                                                           *
 * http://www.apache.org/licenses/LICENSE-2.0                                *
 *                                                                           *
 * Unless required by applicable law or agreed to in writing, software       *
 * distributed under the License is distributed on an "AS IS" BASIS,         *
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  *
 * See the License for the specific language governing permissions and       *
 * limitations under the License.                                            *
 *****************************************************************************/
package com.googlecode.lazyparsec;

import com.googlecode.lazyparsec.pattern.CharacterPredicates;
import com.googlecode.lazyparsec.pattern.Pattern;
import com.googlecode.lazyparsec.pattern.Patterns;
import com.googlecode.totallylazy.Predicate;

/**
 * Provides common {@link Parser} implementations that scan the source and match certain string
 * patterns.
 * <p/>
 * Some scanners like {@link #IDENTIFIER} and {@link #INTEGER} return the matched string,
 * while others like {@link #WHITESPACES} return nothing, as indicated by the {@link Void}
 * type parameter. In case the matched string is still needed nontheless,
 * use the {@link Parser#source()} method.
 *
 * @author Ben Yu
 */
public final class Scanners {

    /**
     * A scanner that scans greedily for 1 or more whitespace characters.
     */
    public static final Parser<Void> WHITESPACES =
            pattern(Patterns.many1(CharacterPredicates.IS_WHITESPACE), "whitespaces");

    /**
     * Matches any character in the input. Different from {@link Parsers#always()},
     * it fails on EOF. Also it consumes the current character in the input.
     */
    public static final Parser<Void> ANY_CHAR = new AnyCharScanner("any character");

    /**
     * Scanner for c++/java style line comment.
     */
    public static final Parser<Void> JAVA_LINE_COMMENT = lineComment("//");

    /**
     * Scanner for SQL style line comment.
     */
    public static final Parser<Void> SQL_LINE_COMMENT = lineComment("--");

    /**
     * Scanner for haskell style line comment. ({@code --})
     */
    public static final Parser<Void> HASKELL_LINE_COMMENT = lineComment("--");

    private static final Parser<Void> JAVA_BLOCK_COMMENTED =
            pattern(notChar2('*', '/').many(), "commented block");

    /**
     * Scanner for c++/java style block comment.
     */
    public static final Parser<Void> JAVA_BLOCK_COMMENT =
            Parsers.sequence(string("/*"), JAVA_BLOCK_COMMENTED, string("*/"));

    /**
     * Scanner for SQL style block comment.
     */
    public static final Parser<Void> SQL_BLOCK_COMMENT =
            Parsers.sequence(string("/*"), JAVA_BLOCK_COMMENTED, string("*/"));

    /**
     * Scanner for haskell style block comment. {- -}
     */
    public static final Parser<Void> HASKELL_BLOCK_COMMENT = Parsers.sequence(
            string("{-"), pattern(notChar2('-', '}').many(), "commented block"), string("-}"));

    /**
     * Scanner with a pattern for SQL style string literal. A SQL string literal
     * is a string quoted by single quote, a single quote character is escaped by
     * 2 single quotes.
     */
    public static final Parser<String> SINGLE_QUOTE_STRING = quotedBy(
            pattern(Patterns.regex("(('')|[^'])*"), "quoted string"), isChar('\'')).source();

    /**
     * Scanner with a pattern for double quoted string literal. Backslash '\' is
     * used as escape character.
     */
    public static final Parser<String> DOUBLE_QUOTE_STRING = quotedBy(
            pattern(Patterns.regex("((\\\\.)|[^\"\\\\])*"), "quoted string"), Scanners.isChar('"'))
            .source();

    /**
     * Scanner for a c/c++/java style character literal. such as 'a' or '\\'.
     */
    public static final Parser<String> SINGLE_QUOTE_CHAR = quotedBy(
            pattern(Patterns.regex("(\\\\.)|[^'\\\\]"), "quoted char"), Scanners.isChar('\''))
            .source();

    /**
     * Scanner for the c++/java style delimiter of tokens. For example,
     * whitespaces, line comment and block comment.
     */
    public static final Parser<Void> JAVA_DELIMITER =
            Parsers.plus(WHITESPACES, JAVA_LINE_COMMENT, JAVA_BLOCK_COMMENT).skipMany();

    /**
     * Scanner for the haskell style delimiter of tokens. For example,
     * whitespaces, line comment and block comment.
     */
    public static final Parser<Void> HASKELL_DELIMITER =
            Parsers.plus(WHITESPACES, HASKELL_LINE_COMMENT, HASKELL_BLOCK_COMMENT).skipMany();

    /**
     * Scanner for the SQL style delimiter of tokens. For example, whitespaces and
     * line comment.
     */
    public static final Parser<Void> SQL_DELIMITER =
            Parsers.plus(WHITESPACES, SQL_LINE_COMMENT, SQL_BLOCK_COMMENT).skipMany();

    /**
     * Scanner for a regular identifier, that starts with either
     * an underscore or an alpha character, followed by 0 or more alphanumeric characters.
     */
    public static final Parser<String> IDENTIFIER = pattern(Patterns.WORD, "word").source();

    /**
     * Scanner for an integer.
     */
    public static final Parser<String> INTEGER = pattern(Patterns.INTEGER, "integer").source();

    /**
     * Scanner for a decimal number.
     */
    public static final Parser<String> DECIMAL = pattern(Patterns.DECIMAL, "decimal").source();

    /**
     * Scanner for a decimal number. 0 is not allowed as the leading digit.
     */
    public static final Parser<String> DEC_INTEGER =
            pattern(Patterns.DEC_INTEGER, "decimal integer").source();

    /**
     * Scanner for a octal number. 0 is the leading digit.
     */
    public static final Parser<String> OCT_INTEGER =
            pattern(Patterns.OCT_INTEGER, "octal integer").source();

    /**
     * Scanner for a hexadecimal number. Has to start with {@code 0x} or {@code 0X}.
     */
    public static final Parser<String> HEX_INTEGER =
            pattern(Patterns.HEX_INTEGER, "hexadecimal integer").source();

    /**
     * Scanner for a scientific notation.
     */
    public static final Parser<String> SCIENTIFIC_NOTATION =
            pattern(Patterns.SCIENTIFIC_NOTATION, "scientific notation").source();

    /**
     * A scanner that scans greedily for 0 or more characters that satisfies the given Predicate<Character>.
     *
     * @param predicate the predicate object.
     * @return the Parser object.
     */
    public static Parser<Void> many(Predicate<Character> predicate) {
        return pattern(Patterns.isChar(predicate).many(), predicate + "*");
    }

    /**
     * A scanner that scans greedily for 1 or more characters that satisfies the given Predicate<Character>.
     *
     * @param predicate the predicate object.
     * @return the Parser object.
     */
    public static Parser<Void> many1(Predicate<Character> predicate) {
        return pattern(Patterns.many1(predicate), predicate + "+");
    }

    /**
     * A scanner that scans greedily for 0 or more occurrences of the given pattern.
     *
     * @param pattern the pattern object.
     * @param name    the name of what's expected logically. Is used in error message.
     * @return the Parser object.
     */
    public static Parser<Void> many(Pattern pattern, String name) {
        return pattern(pattern.many(), name);
    }

    /**
     * A scanner that scans greedily for 1 or more occurrences of the given pattern.
     *
     * @param pattern the pattern object.
     * @param name    the name of what's expected logically. Is used in error message.
     * @return the Parser object.
     */
    public static Parser<Void> many1(Pattern pattern, String name) {
        return pattern(pattern.many1(), name);
    }

    /**
     * Matches the input against the specified string.
     *
     * @param str the string to match
     * @return the scanner.
     */
    public static Parser<Void> string(String str) {
        return string(str, str);
    }

    /**
     * Matches the input against the specified string.
     *
     * @param str  the string to match
     * @param name the name of what's expected logically. Is used in error message.
     * @return the scanner.
     */
    public static Parser<Void> string(String str, String name) {
        return pattern(Patterns.string(str), name);
    }

    /**
     * A scanner that scans the input for an occurrence of a string pattern.
     *
     * @param pattern the pattern object.
     * @param name    the name of what's expected logically. Is used in error message.
     * @return the Parser object.
     */
    public static Parser<Void> pattern(Pattern pattern, String name) {
        return new PatternScanner(name, pattern);
    }

    /**
     * A scanner that matches the input against the specified string case insensitively.
     *
     * @param str  the string to match
     * @param name the name of what's expected logically. Is used in error message.
     * @return the scanner.
     */
    public static Parser<Void> stringCaseInsensitive(String str, String name) {
        return pattern(Patterns.stringCaseInsensitive(str), name);
    }

    /**
     * A scanner that matches the input against the specified string case insensitively.
     *
     * @param str the string to match
     * @return the scanner.
     */
    public static Parser<Void> stringCaseInsensitive(String str) {
        return stringCaseInsensitive(str, str);
    }

    /**
     * A scanner that succeeds and consumes the current character if it satisfies the given
     * {@link Predicate<Character>}.
     *
     * @param predicate the predicate.
     * @return the scanner.
     */
    public static Parser<Void> isChar(Predicate<Character> predicate) {
        return isChar(predicate, predicate.toString());
    }

    /**
     * A scanner that succeeds and consumes the current character if it satisfies the given
     * {@link Predicate<Character>}.
     *
     * @param predicate the predicate.
     * @param name      the name of what's expected logically. Is used in error message.
     * @return the scanner.
     */
    public static Parser<Void> isChar(Predicate<Character> predicate, String name) {
        return new IsCharScanner(name, predicate);
    }

    /**
     * A scanner that succeeds and consumes the current character if it is equal to {@code ch}.
     *
     * @param ch   the expected character.
     * @param name the name of what's expected logically. Is used in error message.
     * @return the scanner.
     */
    public static Parser<Void> isChar(char ch, String name) {
        return isChar(CharacterPredicates.isChar(ch), name);
    }

    /**
     * A scanner that succeeds and consumes the current character if it is equal to {@code ch}.
     *
     * @param ch the expected character.
     * @return the scanner.
     */
    public static Parser<Void> isChar(char ch) {
        return isChar(ch, Character.toString(ch));
    }

    /**
     * A scanner that succeeds and consumes the current character if it is equal to {@code ch}.
     *
     * @param ch   the expected character.
     * @param name the name of what's expected logically. Is used in error mesage.
     * @return the scanner.
     */
    public static Parser<Void> notChar(char ch, String name) {
        return isChar(CharacterPredicates.notChar(ch), name);
    }

    /**
     * A scanner that succeeds and consumes the current character if it is not equal to {@code ch}.
     *
     * @param ch the expected character.
     * @return the scanner.
     */
    public static Parser<Void> notChar(char ch) {
        return notChar(ch, "^" + ch);
    }

    /**
     * A scanner that succeeds and consumes the current character if it equals to any character in
     * {@code chars}.
     *
     * @param chars the characters.
     * @param name  the name of what's expected logically. Is used in error message.
     * @return the scanner.
     */
    public static Parser<Void> among(String chars, String name) {
        return isChar(CharacterPredicates.among(chars), name);
    }

    /**
     * A scanner that succeeds and consumes the current character if it equals to any character in
     * {@code chars}.
     */
    public static Parser<Void> among(String chars) {
        if (chars.length() == 0) return isChar(CharacterPredicates.NEVER);
        if (chars.length() == 1) return isChar(chars.charAt(0));
        return isChar(CharacterPredicates.among(chars));
    }

    /**
     * A scanner that succeeds and consumes the current character if it is not equal to any character
     * in {@code chars}.
     *
     * @param chars the characters.
     * @param name  the name of what's expected logically. Is used in error message.
     * @return the scanner.
     */
    public static Parser<Void> notAmong(String chars, String name) {
        return isChar(CharacterPredicates.notAmong(chars), name);
    }

    /**
     * A scanner that succeeds and consumes the current character if it is not equal to any character
     * in {@code chars}.
     */
    public static Parser<Void> notAmong(String chars) {
        if (chars.length() == 0) return ANY_CHAR;
        if (chars.length() == 1) return notChar(chars.charAt(0));
        return isChar(CharacterPredicates.notAmong(chars));
    }

    /**
     * A scanner that succeeds and consumes all the characters until the {@code '\n'} character
     * if the current input starts with the string literal {@code begin}. The {@code '\n'} character
     * isn't consumed.
     */
    public static Parser<Void> lineComment(String begin) {
        return pattern(Patterns.lineComment(begin), begin);
    }

    /**
     * A scanner for non-nested block comment that starts with {@code begin} and ends with
     * {@code end}.
     */
    public static Parser<Void> blockComment(String begin, String end) {
        Pattern opening = Patterns.string(begin).next(Patterns.notString(end).many());
        return pattern(opening, begin).next(string(end));
    }

    /**
     * A scanner for a non-nestable block comment that starts with {@code begin} and ends with
     * {@code end}.
     *
     * @param begin     begins a block comment
     * @param end       ends a block comment
     * @param commented the commented pattern.
     * @return the Scanner for the block comment.
     */
    public static Parser<Void> blockComment(String begin, String end, Pattern commented) {
        Pattern opening = Patterns.string(begin)
                .next(Patterns.string(end).not().next(commented).many());
        return pattern(opening, begin).next(string(end));
    }

    /**
     * A scanner for a non-nestable block comment that starts with {@code begin} and ends with
     * {@code end}.
     *
     * @param begin     begins a block comment
     * @param end       ends a block comment
     * @param commented the commented pattern.
     * @return the Scanner for the block comment.
     */
    public static Parser<Void> blockComment(Parser<Void> begin, Parser<Void> end, Parser<?> commented) {
        return Parsers.sequence(begin, end.not().next(commented).skipMany(), end);
    }

    /**
     * A scanner for a nestable block comment that starts with {@code begin} and ends with
     * {@code end}.
     *
     * @param begin begins a block comment
     * @param end   ends a block comment
     * @return the block comment scanner.
     */
    public static Parser<Void> nestableBlockComment(String begin, String end) {
        return nestableBlockComment(begin, end, Patterns.isChar(CharacterPredicates.ALWAYS));
    }

    /**
     * A scanner for a nestable block comment that starts with {@code begin} and ends with
     * {@code end}.
     *
     * @param begin     begins a block comment
     * @param end       ends a block comment
     * @param commented the commented pattern except for nested comments.
     * @return the block comment scanner.
     */
    public static Parser<Void> nestableBlockComment(String begin, String end, Pattern commented) {
        return nestableBlockComment(
                string(begin), string(end), pattern(commented, "commented"));
    }

    /**
     * A scanner for a nestable block comment that starts with {@code begin} and ends with
     * {@code end}.
     *
     * @param begin     starts a block comment
     * @param end       ends a block comment
     * @param commented the commented pattern except for nested comments.
     * @return the block comment scanner.
     */
    public static Parser<Void> nestableBlockComment(
            Parser<?> begin, Parser<?> end, Parser<?> commented) {
        return new NestableBlockCommentScanner(begin, end, commented);
    }

    /**
     * A scanner for a quoted string that starts with character {@code begin} and ends with character
     * {@code end}.
     */
    public static Parser<String> quoted(char begin, char end) {
        Pattern beforeClosingQuote =
                Patterns.isChar(begin).next(Patterns.many(CharacterPredicates.notChar(end)));
        return pattern(beforeClosingQuote, Character.toString(begin)).next(isChar(end)).source();
    }

    /**
     * A scanner for a quoted string that starts with {@code begin} and ends with {@code end}.
     *
     * @param begin  begins a quote
     * @param end    ends a quote
     * @param quoted the parser that recognizes the quoted pattern.
     * @return the scanner.
     */
    public static Parser<String> quoted(Parser<Void> begin, Parser<Void> end, Parser<?> quoted) {
        return Parsers.sequence(begin, quoted.skipMany(), end).source();
    }

    /**
     * A scanner that after character level {@code outer} succeeds,
     * subsequently feeds the recognized characters to {@code inner} for a nested scanning.
     * <p/>
     * <p> Is useful for scenaios like parsing string interpolation grammar.
     */
    public static Parser<Void> nestedScanner(Parser<?> outer, Parser<Void> inner) {
        return new NestedScanner(outer, inner);
    }

    /**
     * Matches a character if the input has at least 1 character, or if the input has at least 2
     * characters with the first 2 characters not being {@code c1} and {@code c2}.
     *
     * @return the Pattern object.
     */
    private static Pattern notChar2(final char c1, final char c2) {
        return new Pattern() {
            @Override
            public int match(CharSequence src, int begin, int end) {
                if (begin == end - 1) return 1;
                if (begin >= end) return MISMATCH;
                if (src.charAt(begin) == c1 && src.charAt(begin + 1) == c2) return Pattern.MISMATCH;
                return 1;
            }
        };
    }

    private static Parser<Void> quotedBy(Parser<Void> parser, Parser<?> quote) {
        return parser.between(quote, quote);
    }

    private Scanners() {
    }
}
